{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Machine Learning Overview"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f058a9af7b8>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X+QVOWZL/DvM93TDTMjoYDhx4QfjVwUCCsJMIxoslFCtiBisveaKt14QTReqszd3GxtbkWXuzfejBWupmq3bl2TikUiKuqV2oq7q0yAG7kYXBNgGCxGYUAUHMEdkBlYlJmBnunu9/7R3WPTc368p7vP6XO6v58qS6BP97xvn+mn3/Oc531fUUqBiIiCo6bcDSAiImcYuImIAoaBm4goYBi4iYgChoGbiChgGLiJiAKGgZuIKGAYuImIAoaBm4goYMJuvOikSZNULBZz46WJiCrSoUOH+pRSjTrHuhK4Y7EYOjo63HhpIqKKJCIf6h7LVAkRUcBojbhFpBvAZQBJAAml1FI3G0VEROacpEpuV0r1udYSIiLSwlQJEVHA6AZuBeB3InJIRDYYHSAiG0SkQ0Q6ent7S9dCIiK6hm6q5FalVI+ITAbwmogcV0q9kXuAUmozgM0AsHTp0kDtztAfT6CtswfdFwYQm1iPNYua0BB1peCGiKhoWtFJKdWT+f95EfknAMsAvGH9rGA42H0R659ph1LA4FASdZEQHvttF569fxmaYxPK3TzSxC9fqia2v9kiUg+gRil1OfPnPwPQ6nrLPNAfT2D9M+0YiCdH/m1wKP3n9c+0o33jStRX6Ie/kgIdv3yp2uh8UqcA+CcRyR7/f5RSu1xtlUfaOntgtuWmUkDb2z24u3mmt43yQCUFumr+8qXqZXtzUil1Sim1KPPfF5RSP/WiYV7ovjAw8iHPNziURHffoMctcl9uoMv2fXAoiYF4MvPviTK30BmdL1+iSlPV5YCxifWoi4QMH6uLhBCbVOdxi9xXaYGuGr98iao6cK9Z1IR0Bmg0EWDNTU3eNsgDdoHupfYz2NZ+Gv0BGXlbfflGQoKpnxvjcYuI3FfVgbshGsaz9y9DfTQ08uGvi4RQHw1l/r3ycqNWgQ4ADp+5hNa2LrRs2o2D3Rc9bFlhrL58h5IKT+w6Foh+EDkhyuy6uQhLly5VQVodcCCeQNvbPejuG0RsUh3W3NRUkUEbSOe4WzbtvuZmnpn6aCgQN/cOdl/E+i3tGDC5kghKP6i6icgh3XWgqnrEnVUfDePu5pl4ePU83N08MzAf8P54AtvaT+Pxnce00xtGVxlm/JLztutnc2wCfrTqRkRCxkNvv/SDqFSCEaFolGJK+ppjE9C+cSXa3u7BS+1ncPjMJcPj/HBzT7efZz+5iqGk8dWjH/pBVEoccbuokBGx7usWW9KXvcq4p3mGbytrnPSzGiuEqHoxcBfJLDgf7L6Ilk270drWhaf2nirpDb9SlvRZ3dwbTqZwdThZtgoTJ/2sxgohql4M3EUwDM4/3Y3WV4/iO7/a79okl1LWLlvlvIeTCk/serdsFSZO+lmNFUJUvfjbXCCrqdZb/tht+rxSTKXPpgWMglohaYFszvs3hz7CY21HkUh99lg5p4877Wdu7r4aKoSoenHEXSCry3grpbhR5kZaoD4aRjRcg0jYOE9cjsqMQvoZ1AohIid8FbjdupnnBqvLeCuluFHmVlrALjWx451znp4bpj+IjPlmAo5R2ZcIfLti3bb20/jJ9i5cGXYWvEs5GaTUE4e2tZ9Ga1uXafCOhARDSeX5uammCVJUvZxMwPFF4LaazefXWW/98QSWPPYa4rkJYQuRkKA2XOPbLyLA2axKwL/nhiiInARuX3zi/LQutu4GAw3RMFYtnIpXDtvnfcM1wN/esQB3LZletiCX7deJjy/j0uAwxtfV4oYp113Tv2xqIvfKJzvKNlLJa5YT+ZkvArdfluZ0Ohtx+fUT8buj53Bl2HjUbTXK9nIHmmy/Ekl1zRVCNFwz0r/508bhN4c+wuvHP8YXp4/HxIYIGhvG4P3ey9h7os/wdbN57ztuCu7uOURB5ItPW6nL2wpRyE4qaxY14bHfdhm+ntUo28sdaIz6lRVPpBBPAN/51X6kUgr5A+sxtTVYf0sMB7v/zfSLdd/JPrRs2u3rFBBRpfFFVYkfZr0VMhvRqurhpQ3Lse6W2Kig7fUONDpli8PJ0UEbAK4Op/DcHz+AyakBkF46Nai75xAFlS9G3Ea51dzKBS/ywoWma5xO+nAjn2+Vdim0bDErmQLu+/IsPL//QwwnUo7y3f3xBF7uOIM9754HANw+bwq+vWQ60ypERfLNJ6jcs96KSddkJ33oKHU+P5t2SaUUrgynEK4BfvzqEfx6XTP+9IZGy37pGEoqCATtG1fiey8essx357b9YPdFrH36AK7m5P/3nujD4zuP4fnvtjCtQlQEX6RKsso5680P6Rqn+fzctEv2BmkiBQwlFNZtaccbJ3ot+6UjEhLEJtWhPhrG6oXTtFbg648ncN+Wa4N21tXhVHrTA6ZViArmq8BdTl7M0uuPJ7B1X7fp4wJnXxBtnT1IpcwT2P9pawcEGOlXNOz8dIdDMtIm3S+3ts4eJExSKkB61UFubEBUON+kSvygOTYBr//wNjyx6zhO9g5gTmM9Hl41D5PHlWbD2bbOHsDiVt/a5bMcfUF0XxgwLUUEgGRKjeSds2mo9z7ux78NDqE+Gsa29tOmOWsgXVXy3AMtI23SvRfRfWHA8nWHkgrdfYO2JZFOSia9LK8kKjf+ZufIL9M78fFl7Dp6rmSlbnY3CsWyfmO02MR6hASGFSEAkEipkbxzfh7+YPdF/EPHmVFPFgCLZnwO/2Hx53HX4hmjvkh07kXEJtZbTtyJhAQKCi2bdpuWRDopmfSyvJLID5gqyfCiTM/JLi398QSe+8MHuG/LAdy35QCe/WP3qIWdmsaPNQ3aADC21jhnnu2rUQ56bKQGLz54M9Ytn206+re7F7FmURPCJvs/AkA4VIOt+7pN3+vzn17VPhdel1cS+QFH3BleTLu3mrCTmyM2q8j4nzu6cP+tswEAUz83Fj/bdczy59XUGOfM7fr641eOoPG6aMEph4ZoGM890DKqD0A6/bLu5lnYuv9D05//xK7j2ufCT8slEHmFgTvDi2n3Ojliq4qMeELhqb2nAMAyFQGkp7Ob3VS16uuV4RReOdyDREoVlXJojk3Aob/9Ol5+6wz2HOsFAKyYNxl3LZmOJ/e8Z/len+zt1z4XflkugchLDNwZXk27z80RnzjXj0tXhvC5sbU4eb4f86eNs63IyLIK2gCw9uZZpsHWrrY7kalUKXb3m/poGOuWz8a65bO1f35dJIQ5jQ048bFx8M4/F35YLoHIa8xxZ3hZx10fDeP6xgZs6ziNnUfOYcsfukc2E9536oJtULZTFwlh7pQG08ed1nbnTvkvxWYXdu/1w6vmaZ8LP9TfE3mNgTvDy91WrG6o7TpyDrUWN/Z02AUso75alXhnUw6l2Lk+W7a3cv4URMKCsbXpH5z7Xk8eN0b7XHCXHKpGvthIwU+82G3FaqeZsbUhJFJJONlYp9CdaXL7ev7yVex455zhjj6RkGDZ7Alo776IocTo3xfdDRXyy/bG1oaQUgqrF07F8jkTR73XTs4Fd8mhoAvcDjjV5vGdx0ZuMhr58y82YdfRc4Y3KPPVR0J4ePWNOHspXlTAcrr7Ta66SAiP3rnAsnojiLscEXkpcDvgVBu7G2rL50zET//9n1xTkTGnsR7bDp6Bgjt7chpVvOjSqd7wY9keZ1tSUGn/lopICEAHgH9VSq1xr0mVT6ee26gi46//7EZX0wG5FS873jmHfSf7tG6U6lRv+K1sj7MtKcic3Jz8AQDrGR+kpdAbal6snpj9GfOnXadd3aJTveFk1qjbONuSgk7rky8i0wHcAeCnAP7a1RZViULWH/fy0l5nHW8nm13ozhrNcrOvfkzbEDmh+0n4XwB+BOA6swNEZAOADQAwcyZ/6XU42YDB60t7q0AbDddg7c2zMHdKg3a6xskuR2731W9pGyKnbD9xIrIGwHml1CERuc3sOKXUZgCbgXRVSclaSAVtZFwsu0BbSADVucrwoq9WVxNmC3MR+YnOJ+BWAN8UkW8AGANgnIi8oJT6j+42rTKU4pK/XJf2bmwnZ3eVUe7Fvq4MJzFt/Fjb12BFCpWT7W+aUupvAPwNAGRG3P+VQVtPqS75y3lpbxZo3QpcXi329ct7l2DdlnbDxx964ZDlyJ4VKVRunPLuklJWLvipIgNASaa+m/Gqrz2XroxMt8+XuzZLPlakkB84CtxKqd+zhluPziV/lt3CTX5aSMntwOVVX622fbMa2Ts5r0Ru4YjbJbqX/DqjVz8tpOR24PKqr4WO7FmRQn7Auyku0Vkn2kkFhRs3CgvhReDyoq9O68qzuP43+QFH3C7RueR3Onr1YuakHa9y0G73tdCRvZ/SVlS9GLhdYhcYFICdR84G7rK7kgJXdmT/6J0L8NBX5+DROxegfeNKy8oQP6WtqHpxWVeXGa0T3XX2U6x/ph3DiZTpeiA6S6WWi1E5XClXKgwCrv9Npcb1uH1Md91rv69RzcBFVFpcj9vHrPLaQHq3mVqLHdr9wsk6K0RUWv6NDBXKqioDAG6ZMwm/uHexr4M2EZUXb056zK4qY/WfTGXQJiJLDNweq6SqDCIqDwZuj7GcjIiKxShRBn6ZBUlEwcRIUSasyiCiQjFVQkQUMAzcREQBw8BNRBQwDNxERAHDwE1EFDAM3EREAcPATUQUMAzcREQBw8BNRBQwDNxERAHDwE1EFDAM3EREAcPATUQUMAzcREQBw8BNRBQwDNxERAHDwE1EFDAM3EREAcPATUQUMAzcREQBYxu4RWSMiLSLSKeIHBWRn3jRMCIiMqazy3scwAqlVL+I1AJ4U0R2KqX2u9w2IiIyYBu4lVIKQH/mr7WZ/5SbjSIiInNaOW4RCYnIYQDnAbymlDrgbrOIiMiMVuBWSiWVUl8EMB3AMhFZmH+MiGwQkQ4R6ejt7S11O4mIKMNRVYlS6hKA3wNYZfDYZqXUUqXU0sbGxhI1j4iI8ulUlTSKyPjMn8cCWAnguNsNIyIiYzpVJdMAPCciIaQD/T8opdrcbRYREZnRqSp5G8CXPGgLERFp4MxJIqKAYeAmIgoYBm4iooBh4CYiChgGbiKigGHgJiIKGAZuIqKAYeAmIgoYBm4iooBh4CYiChgGbiKigGHgJiIKGAZuIqKAYeAmIgoYBm4iooBh4CYiChgGbiKigGHgJiIKGAZuIqKAYeAmIgoYBm4iooBh4CYiChgGbiKigGHgJiIKGAZuIqKAYeAmIgoYBm4iooBh4CYiChgGbiKigGHgJiIKGAZuIqKAYeAmIgoYBm4iooCxDdwiMkNEXheRYyJyVER+4EXDiIjIWFjjmASAHyql3hKR6wAcEpHXlFJdLreNiIgM2I64lVJnlVJvZf58GcAxAJ93u2FERGTMUY5bRGIAvgTggBuNISIie9qBW0QaALwM4K+UUp8aPL5BRDpEpKO3t7eUbSQiohxagVtEapEO2i8qpf7R6Bil1Gal1FKl1NLGxsZStpGIiHLoVJUIgKcBHFNK/b37TSIiIis6I+5bAawFsEJEDmf++4bL7SIiIhO25YBKqTcBiAdtISIiDZw5SUQUMAzcREQBw8BNRBQwDNxERAHDwE1EFDAM3EREAcPATUQUMAzcREQBw8BNRBQwDNxERAHDwE1EFDAM3EREAcPATUQUMDqbBRORhv54Am2dPei+MIDYxHqsWdSEhqjxR8zJscU8hyqTKKVK/qJLly5VHR0dJX9dIr862H0R659ph1LA4FASdZEQRIBn71+G5tiEgo81e04kJFAAvnvrbPzl1+YygFcAETmklFqqdSwDN1Fx+uMJtGzajYF4ctRj9dEQ2jeuRH0msDo5Vuf1AaAuUoPnHmgxDfrZ1+Bo3d+cBG7muImK1NbZA7Pxj1JA29s9BR2r8xwAGBxKYf0z7RiIJwwfP9h9ES2bdqO1rQtP7T2F1rYutGzajYPdF81flHyNgZuoSN0XBjA4ZDwaHhxKortvsKBjdZ6TZRb0++OJTFBPjrzG4FASA/GkZbAnf2PgJipSbGI96iIhw8fqIiHEJtVpHRsJCbrOfoJt7afRnxNQrZ6TZRb0CxnhV5v+eALb2k/j8Z3HRr33fsXATVSkNYuaICa7sooAa25q0jp2KKmw90TfqFSG1XOy8r8gsgoZ4VeToKaRGLiJitQQDePZ+5ehPhoaGRnXRUKoj4Yy/x62PDZffipj5DkWo+78L4gsJ1cD+YI4EnVCN43kx/eBVSVEJTIQT6Dt7R509w0iNqkOa25qGlUhkn/sjnfOYd/JPgwlR38O6yIhPHrnAtzdPHPkOU/ueQ+//pdTqBHBUFLZlhIWUsUCFFayGDTb2k+jta3L8Iok+95f39jg2fvgpKqE9UBEJVIfDY8EWd1jP+gbwN4TvYbH5Kcy6qNhPLJ6Pr6/Yq72F0R2tG4WfIyelzsSzW0LAKx/pt002AeNXRrpxMf9aG3r8uX7EPx3nyhA8uupp40bg7pIyHTUZ5TKcPIFAQDNsQlo37hSO9jr3NB08vP9KptGMnvvLw0O+fZ9YOAm8ohh+gFAyiQ6mOWtC+Ek2FfLDc01i5rw2G+7DB8TAcbX1fr2fWDgJvKAVfphTG0N6iI1AEQrleE2u5Go0VVAEGdm2qWRTp7v13ofytF3f7+zRAGg88G1Sj/UiOCRVfMQra3RSmW4zW4kmn8VYHQl8dhvu3x/I7M/nsDJ8/24e+kMfHJlGOPHRnDD1IaR937+tHG270O5+s7ATVQE3Q+uXfrh7CdX8fDqeUW1pVQjPyc3NO1uZL7+w9uw5/h5343Erapmsv2zex8UULabuCwHJCqQk1I7ndKzYm50uVG+p1PeaNWvaDg9TSRUMzoFVM6RuNMSSbP3odTnlOWARB7Qqb6446YmtHX24MTHl5FM6d+EdLq2d7EjP7OfZxd4rK4k4onUNX/XaZMX+WKnVTNmN3bLeROXgZuoQHYf3H0nL6C1rWtkFJwdgUbDNYgnUiMj0F/euwTbc4JV0/ixeOjFQ9p502LL94rJ01rdyDRj1iav8sWlCriF3MQtFQZuogJZfXDH1oaw48hZDCU+i6jZEaiCwoNfno25UxowbfxYPPTCZ0F6bG0Nrgw7G6kWE4iKHa1b3cg0Y9QmLyf9lCrgOr2JW0pcq4TIgtU6FVaLP6WUQsjkwXBNDeZOacAdNzXhoRcOXbNWRn7QzmW2ml8x65EUu3qg0dorkZAgJEC4xrj/Rm3ychVDJ4uCWXGyRk2p2QZuEdkiIudF5IhrrSDyIbuV46w+uKsWTjUNwtkRp90GCWbPy1dMICpF2iA7M3Pd8lnIZIOQVEDCLKcP4Opw8povQ7t27HjnXMkWdyplwM32/dE7F+Chr87Bo3cuQPvGla7ffNVp4bMAfg5gq6stIfIRnUt3BeDk+X7c0zwDlwaHMb4ughumpOuAt3f24LWujy0vxz/os98gweh5+QpZjySrVGkDBeD5/R8inQ0yDth1kRBSSiGpFJ7Y9e41eey1N8+yzJXvO9mHlk27S5bvdroMgBWnSxCUgm0rlVJviEjM/aYQ+YfdpfuTe97D8/s/NA2UOvnP7Z09jm7sWY2eCw1EpcrTWr1fkZDgljmTcPu8yXhi1zEMDn12JZLt+9Z93RCYLzo+lFQYSiZLmu8uR8AtFea4iQzYXbo//eYHtus4r22ZhdqQIBJKB6T8y3G7DRLG1jq7jM8GoodXz8PdzTO1glup0gZW79dQUmH+tHGZqhrjDisFrF0+C/XR0Mj7ZXYcd+0pYVWJiGwAsAEAZs4M5rcYUZZVCsEusOSOxoeTCpGQIFwDrFs+C99fMdd2Zh6g8BfLZuL98/0ABCvmTca3l0xHfTTsSp1zKdIGOikXq9TQleEUzn1yFe0bV+J7Lx7C3hN9hseVe3Env9CaOZlJlbQppRbqvChnTlLQWc2uC9cACfPiD9PHzTYuyJ2Zp6Dw/L4PoTA6BQPAt5sb6MxG3N7Zg59sP2p60zYarsFb//3r2N7Z4+osU79yMnOSqRIiA1YphAe/cr3lhr81JvkPs8v8bIrjP6/4d9i6rxsDQ6NTMPdtOYD1W/y1W3tuqWRbZw9+ee8Sy5TLmkVNSFoMFGtE0PZ2T8nK9SqZ7bWQiLwE4DYAk0TkIwCPKqWedrthROVmlkLIVlAYUUinR4zYXeb//P+9d82Nu1yJpIJZtcZwIoXvvXgIqxdO82wRp/xZjpGQIKUU1t0SQ2xCHc5+Eh+VcmmIhrF64TS8ctg4R31lOP3+FFMlUy24yBRRAcwWdVrbMgtb93/o+DK/P57Al1p/Zxr0dXiVOrFKiwBAfSSEZx8wbsOzf+zG/3j1qOlrt37zC1h3SwyAsz08KwEXmSJymeVo/IDxaNzqMr+ts8eiGA4ISXqVPaNNhbO8WlLUbuLQwJB52Z7YDRTls8eDXK7nNua4iQrQH09ge2cPPugbwKyJdbgjZzRoVwZopPvCgGVQFgFqQ3ofV7dL5qxK/+zacPbTq5bPO3spXlTbqgVH3EQOma1i9/CqeXhi13HbMkAjdqvsPfiV6/G1+VOu+blm3C6Z01kR0KwN5VxRr5JwxE3kQO5U+Pzqjh+/cvSafx9KKiRS5jcyc1lVUtRHQvj+irnXrIvx1RsaTevJ3Q6AdhOHzNrQH0/gaiKFoYRxwGfFiD4GbiIHnC4MBVinLrIldT/f8x7WtsxCfcSgnO6Bz1Is2bzvL+5djNqw8cc3PwBarXBYiGzVR3qDY2P5bcgu2PWzXcdH1bh7taJeJeG7ROSATn43n1nawCjlAiisWx6DQGwrKdbePAu//pdTqJH0TUujkjm3Nidojk3Awf/2dTy55z08/eYHEMC0DUYLdmXVhgSPrL4Rdy2ewaDtAN8pIgcK2fHFLG1gtvrglj98gDd/tAKTx40xfL3cYJxIAZFQOgDm59Ld3pygPhrGI6vn4/sr5lqW7VldpdSGahANhxi0HWKqhMgBnfxuPqPcrVUwG0oofOVnr4+s+53LKMc+lFQYTqpRuXSvNiewW9yqnHszVioGbiIHrKbCt37rC9qr7NmlXOKJlOFUdifB2C8Bs5gdesgYr0+IHLJaTe+uxdO1ZvvppFyMNtV1EoxLWXrXH0/g5Y4z2PPueQDA7fOm4NtLpmtNr3ey5rcXu7xXAr4jRAUwm9WnO9tPZ5Ndo1Gxk2Bs9TOSKYXbb5xs204gnVNf+/QBXM1Z1W/viT48vvMYnv9ui+1NTt21R7za5b0SMFVCVAbZYBYJmyfMjUbFTlbOy03rRA1KB2//u98b5tFz9ccTuG/LtUE76+pwKrNioX15od3ejFb18eVa/dDPGLiJyqQ5NgFv/miFYVAFjG9qOt2xpjk2Aa//8DaovJUF44mUVlBs6+zJrExobDiZ0r7JaXUT08td3isBAzdRGU0eNwYvPNjiaOuw3NHrd788G6sXTsU9zTNw8ny/4eSaPcfPI1xj/FG3C4p2a6gMJVVJbnL65UZqUDDHTVRmhWwdVh8N4/rGBrS2ddnmhIsJirGJ9YiEzFcljISkJFUhXMPEGQZuIpfpVEo4XcLUyeSaYoLimkVNaG07ahq4a0M1I+mcYipCSrXbfLVgqoTIRdk1OlrbuvDU3lNobetCy6bdtjcF7TjJCRezFVhDNIznHmjBmNrRoWJMbc3IOirF9rNUu81XC+6AQ+QSnQ10Cw1Ij+88hqf2njJ9/KGvzsHDq+eN/N1sxx7dUruBeAIvv3UGe471AgBWzJuMu3J2ni9VP6tt15tc3AGHyAd0RsWF7vDiNP1RSB49V300jHXLZ2Pd8tmjHitlP7nrjR4GbiKXuFkpUUhO2K2gyIoQ7zHHTeQSN9fo8FNOmGuReI8jbiKXuF0pUWz6o1RYEeI9Bm4il+iu0VEMP+SEvegnXYtVJUQuK6RSIoir5FVzRUgpOKkqYeAm8pliS/comJwEbt6cJPIRrpJHOhi4iXyEq+SRDgZuIh9hTTTpYOAm8hHWRJMOBm4iHylmQSiqHgzcRD7ipxmR5F/8LSDyGb/MiCT/4m8CkQ/5YUYk+ZdWqkREVonIuyLyvog84najiIjInG3gFpEQgF8AWA1gAYC/EJEFbjeMiIiM6Yy4lwF4Xyl1Sik1BGAbgG+52ywiIjKjE7g/D+BMzt8/yvwbERGVgU7gNqoqHTUpV0Q2iEiHiHT09vYW3zIiIjKkU1XyEYAZOX+fDmDUgglKqc0ANgOAiPSKyIclaWF5TQLQV+5GeIR9rUzsa3DM0j3QdllXEQkDOAHgawD+FcBBAN9RSh0tpoVBICIdusssBh37WpnY18pkO+JWSiVE5C8B/F8AIQBbqiFoExH5ldYEHKXUDgA7XG4LERFp4Fol1jaXuwEeYl8rE/tagVzZuoyIiNzDETcRUcAwcMN+LRYRWZ8pcTyc+e/BcrSzWCKyRUTOi8gRk8dFRP535n14W0QWe93GUtHo620i8knOOf2x120sFRGZISKvi8gxETkqIj8wOKYizq1mXyvm3JpSSlX1f0hXypwEcD2ACIBOAAvyjlkP4OflbmsJ+vqnABYDOGLy+DcA7ER60tXNAA6Uu80u9vU2AG3lbmeJ+joNwOLMn69Dunw3/3e4Is6tZl8r5tya/ccRdxWtxaKUegPARYtDvgVgq0rbD2C8iEzzpnWlpdHXiqGUOquUeivz58sAjmH0shQVcW41+1rxGLj112K5K3OJ+RsRmWHweCWotnVplotIp4jsFJEvlLsxpSAiMQBfAnAg76GKO7cWfQUq8NzmYuDWW4tlO4CYUuomALsBPOd6q8pDa12aCvEWgFlKqUUAngTwz2VuT9FEpAHAywD+Sin1af7DBk8J7Lm16WvFndt8DNwaa7EopS4opeKZv/4KwBKP2uY1rXVpKoFS6lOlVH/mzzsA1IrIpDI3q2AiUot0IHtRKfWPBodUzLm162ulnVsjDNzptVfmishsEYkAuAfAq7ka6Ve0AAAA6ElEQVQH5OUCv4l0Xq0SvQpgXaYC4WYAnyilzpa7UW4Qkaki6f3URWQZ0p+FC+VtVWEy/XgawDGl1N+bHFYR51anr5V0bs1U/Z6TymQtFhFpBdChlHoVwH8RkW8CSCB9w2t92RpcBBF5Cek77pNE5CMAjwKoBQCl1FNIL2vwDQDvAxgEcH95Wlo8jb5+G8BDIpIAcAXAPSpTkhBAtwJYC+AdETmc+beNAGYCFXdudfpaSefWEGdOEhEFDFMlREQBw8BNRBQwDNxERAHDwE1EFDAM3EREAcPATUQUMAzcREQBw8BNRBQw/x8QkWpCYpjvRgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Sklearn has convenient modules to create sample data.\n",
    "# make_blobs will help us to create a sample data set suitable for clustering\n",
    "from sklearn.datasets.samples_generator import make_blobs\n",
    "\n",
    "X, y = make_blobs(n_samples=100, centers=2, cluster_std=0.30, random_state=0)\n",
    "\n",
    "# Let's visualize what we have first\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import seaborn as sns\n",
    "\n",
    "plt.scatter(X[:, 0], X[:, 1], s=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f058a123fd0>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8VNXZwPHfSTKTWRL2IMgiuICIymKUxQ3FhYogVn2tuCBFwVoVXn2rtvZttVattrWtS1W0VClS5RVRdgSR4goGFBBZBEFkD8iWlSzP+8fJNpk7yUwyk8mE55vPfBru3Ln3uTP1mZtznnOOERGUUkoljqR4B6CUUioymriVUirBaOJWSqkEo4lbKaUSjCZupZRKMJq4lVIqwWjiVkqpBKOJWymlEowmbqWUSjApsThomzZtpEuXLrE4tFJKNUkrVqzYJyIZ4ewbk8TdpUsXsrKyYnFopZRqkowx34W7b1iJ2xizFTgClADFIpJZt9CUUkrVVyR33BeJyL6YRaKUUios2jmplFIJJtzELcB7xpgVxpixsQxIKaVUzcJtKjlXRHYaY9oCC40x60VkadUdyhL6WIDOnTtHOUyllFLlwrrjFpGdZf+7F5gBnOOwz0QRyRSRzIyMsCpaGoWjHGUykxnGMG7hFj7m43iHpJRSNao1cRtj/MaY9PLfgcuAr2IdWEMoooiLuIg7uZPZzGYKU7iMy/gzf453aCoC2WRzEzfhK/u5iZvIJjveYSkVM6a2pcuMMSdi77LBNq1MFZHHanpNZmamJEId9+u8zjjGkUtuwHYPHraznda0jlNksVVIIf/H//EhH9KFLoxmNO1oF++w6qSIInrQg21so4giAFy46Exn1rEOF644R6hUeIwxK8Itta61jVtEvgV61TuqRuht3g5K2gBu3PyH//BjfhyHqGLrEIfoT3+2s50ccvDg4XEeZyEL6U//eIcXsVnMYi97K5I22GS+hz3MZjZXc3Uco1MqNo7pcsCWtCTJ4S0QhOY0j0NEsfcET7CFLeSQA0ABBeSQw43ciJB4C0evZW3FtVSVSy5fNY0WPaWCHNOJ+w7uwIMnaLsXLxdyYRwiir1pTKOQwqDtu9jFNrbFIaL66U530kgL2p5GGqdyahwiUir2junEnUkmf+SPePDQjGakk85xHMdCFpISm2lc4i6VVMftJZSQRRb/5J+sYU0DR1V3IxhBS1oGfF4GQzLJnM/5cYxMqdg5phM3wJ3cyS52MYUpzGQmO9jBmZwZ77BiZixj8eEL2GbKfkYzmru5m/70ZzjDA9qNGys3bj7jMy7l0optglBIIT3owVrWxjE6pWLjmE/cAC1owTCGMYhBJJMc73Bi6m7u5jIuqyidSycdN26KKeYIR8gllzzyWMQi/sSf4h1uWNrTnna0C/js8snnEIf4KT+NY2RKxUat5YB1kSjlgIluJzt5l3cRhKu4ig50CPu1q1jFcpbTkpaMZKTj3XUXurCFLdEMuU5KKOFrvsaPnxM50XGfVrTiAAeCtqeQwgEOOLaDK9WYRLUcUDVOE5nIeMZjMADcx338lb8yjnFhvb5X2c9e9jpW1oCtOIm3ucxlFKMooIASSuhGN97hHbrQJWC/muq1m/pfUerYo00lMSQIWWQxgxlRrdj4ju8Yz3gKKCC/7KeAAiYwga1sjehYbWnreBfrwhX3OvZv+IbruI597COHHPLJZw1ruIiLKKU0YN/RjA6qEEohhcu4DC/ehgxbqZjTxB0FG9nIh3zIEY5UbNvLXnrTm0EM4lZupTvdGcvYoIRTF9OZ7lhzLQhv83bEx3uN10gnvSLxefHSghZcyZVxre1+iZeCmnBKKWU/+1lKwBxn/Jbfcg7n4MePFy/ppNOVrkxiUkOGrFSD0KaSetjDHoYxjLWsxYWLoxzll/ySbnTjUR5lIxsDEs/rvM5ZnBV2c0YoJZQ4fgGUUkoxxREf72zOZiMbeZmXmcxktrKVfPL5L/6LTnTifd6nPe3rFXNdVB3GXt1udgf824uXJSxhOcv5ki85kRMZzOCQzUBKJTLtnKyHgQwki6yg5JJKquMgF4Ce9Kz3iL6NbKQ3vcknP2C7Fy9f8AXd6V6n477My0xgAnnkVWxLJpmBDAy6w20Ir/AKE5jgOJfMOtYFtXMrlcgi6ZzU25E62spWvuRLxzvCUEkb7Fwh9dWNbjzEQ3jxklz248XLr/hVnZM2wLM8G5C0wd7dL2c5e9hT37AjdiM30olOAW3XfvyMZrQmbXVMa1RNJQUUkEUW6aRzJmdWVEw0RvvZH/HoShcuRjAiKud/iIcYwQimMQ2A67iO0zm9Xses2kZfVRJJ3MEdLGEJbtyMZjS/5bcx7/Tz4mUZy/gbf+NN3iSddO7iLkYyMqbnVaqxazRNJW/wBmMZi8FQQgntac8c5tCNblGPLxoKKKAlLcMumfPipRWtWMlK2tI2xtHVzb3cy/M8z1GOBmxPKvspbz/34KE//fmAD+IRplJNUsI1lXzFV4xhDEc4wmEOk0sum9nMYAZTQkm8w3PkwUMmtb/H5XNmPMqjrGVto03aYO/i29O+Yki8q+ynfGRluQIK+LzsRynV8BpF4n6Jl4LahQXhEIcatFNsJSsZxjA60YlLubTWZcwu47KQAz8MBj9+fs7PWcpS7uO+uE4Vm08+a1nLbnYzi1m8wiusZ33APi5crGAFT/IkIxjBPdzDVVzl+FeFIKxiVUOFr5SqolEk7p3sDHlnvY99DRLDJ3zC+ZzPHOawne0sYhGXcinzmBfyNWMYgxt3wDaDwYePMYxhLnP5K38Net0ylnEJl5BBBv3pz3zmR/16qvojfySDDM7hHNrTnh/zY8Yznr70ZRSjWMISTuEUWtKSDDJ4nMe5hVv4E3+iH/1CtmUnk5yQc3grlfBEJOqPs846SyIxSSaJX/xCtZ9USZWdsjOiY9VVP+kXdH4EOVlOrvF1S2WpdJJO4hOfeMQjfaSPbJbNIff/WD4Wn/gCzuETn/xb/h3tSxIRkTflzaDzVf1JlmTH7UmSJH+QP8g+2SfNpbkYMQHPGzHiE590ls6yXJbHJHaljiVAloSZYxtF52QBBfSnPxvZWFGb7MfPeMbzGDUubxk1HjyOZXwGQyGFNc6FIQib2UwqqXSiU43nGchAPuXToO0d6MD3fB/1Spre9K5zk4YbN3vZy/d8zxjGsIIVjn8ZNaMZ29jWZFcNUqohJFznpAcPn/AJf+APDGAAP+JHvMEbDZa0ATLIcNyeTnqtZX8Gw8mcXGvSBljNasfte9jjuARXbfawhxu5kTTSaEYzxjKWgxyseL76CMNIuHCxkpWczuksYxnP8EzQXN5ga73LyxKrWsEKxjCGfvTjJ/yE93lfm1aUioJGkbgBfPi4h3v4hE+Yy1yu5MoGPf+DPBiUlHz4mMCEqN4Fhxo67sXrmBRrUkAB/ejHNKaRSy5HOMIrvMJpnMZhDgNwHufVedh3KaUBVTAHOejYUZlHXtAXxJ/5M+dyLpOYxHKW8yZvcjmXcwVX1GlYvlKqUqNJ3PF2J3fyC36BDx9ppOHFyzjG8Rt+E9Xz9KNf0LYkkriHeyKefnQ609nP/oBEKAi72MVpnMYRjvAYj+HHH/GxDYbudKcnPSu2ncd5jh2Vfvycx3kV/97Nbh7ioaCmpxJKWMIS/sW/IopFKRVIE3cZg+FhHmYf+1jJSrLJ5mmejupczvvYx3SmB21PIokzOCPi461iVcjmlb3s5QVeoDvdWclKRjKSEzmR8ziPUYxiMINDrj+ZQgqZZDKXuQHbz+d8zuXcgL8MfPjoT38GMahi20IW1jjH96u8WvHvPPIaxbzfSiUSTdzVuHGTTTaf8mnQvB31tYQljp2cxRQzgxkRH68HPRxXqQcooqjiS+JkTmYyk9nMZj7kQ17lVd7jPW7mZlJIqRhkcw3XsJSlbGADy1ke1KxjMMxmNk/xFH3pSx/68CRPMoc5Ac1JPnw1Ni+lkMJ61jOQgTSnOc1oxhVcwS52VexTPkXtlVzJEIYwlak1DsY6ylFmMpPXeC0hV6tXKiLhlp9E8oi0HLCx+EK+kPbSXtIlXZpJM/GLX16X16N2/HkyT5pJM8fSu7EyNuLj5UiOtJJWIUv9rpArHF9XKqVynVxXUYJpxIhXvPKkPFnfSxQRkVzJlTRJc4zJK16ZKBOlpbQMKDFMkRQ5UU6UYikWEZHRMjqgRNQvfhkmw6RUSoPOt1JWSitpJemSLmmSJh7xyP1yf1SuRamGQgTlgJq4yxRKobSW1o6JZp2si9o5nBKtT3zyuXxesV+JlMh8mS8Py8PygDwgb8vbki/5AccqlVJ5VB4Vj3gcE6Rf/DJf5jvGsUAWhKyb3yE7onKtS2RJUP24W9xyvVwvz8gzjrXl6ZIuc2WurJJV4hWv4zV9IB8EnKdYiqWdtHPcd57Mi8q1KNUQIkncjWp2wHh6j/ccp2gtoohXeCUqK567cTOf+QxhCMUUIwhFFPEYj1XMe5JNNudzPt/ybUU8SSSRTjozmcl2tvMDP7Cf/TzFU0Htw27cJJHEr/gVl3O5Yxxv83bQHNfl57mYi9nLXrrQhd/xuzpX91zIhexjHzOYwVKW0o52DGMYZ3EW93CPYzNUEUVsZjPrWe+4UEQuubzHewHt6ctY5ngtueQykYkMYUid4leqMdPEXeYABxyTRTHFZJMdtfOczdnsYheLWUwOOVzERbSmdcXzd3AH3/BNQCyllHKIQwxiEH78FFNMIYWONdEGwza2haxLB0gjjWSSg9qM88lnAxsA+35cz/W8xEvcxE11ulYvXkaW/VR/D9JIC+pYTSGFXvRiM5tx4QqqSvHgCXivymMO1Z4eappapRKddk6WGcQgxzvuNNIYzvConsuNm570ZAUruJVbeYzH2Mc+SihhJjNDrkspCDnkUEBByIEsJZTUOk/2KEYFzbHiJI887uf+qA+auY7raEObgI7aVFI5gzM4j/NCLlKcRFLQl8AABjh2Wvrx67zdqsnSxF2mE524l3vx46/Y5sdPb3pzFVdF9VwrWMFpnMbTPM1sZvN7fk8PerCFLfVOkh3oEHANTs7gDP7En/DgIb3sJ5TyFdbLbWIT7/Iu61hX5xg9eFjOcm7m5oqJre7mbhayEIOhGc2Yxzxa05pmZT/Nac5bvBVU6eLDxyu8ghdvxQjXNNLoS19u5MY6x6hUY9Yo5ippTBawgJd4iRxyuIEbuJEbw7o7jUQf+vAlXwZsSyKJH/NjfuAHPuCDOiVwHz6mMjXsL5p97GMhC/Hg4QEe4Bu+CdonnXQOcIASSrie65nPfNy4KaKIgQzkXd6t9YuinCAsZjHTmY4XL7dwC73oFXL/Yor5jM8oppiBDKzxc9jABv7BP8gmm+EMZxjDIl6hSKl4imSuEq0qaWC5khtyRr50SZdNsklaS2tJkiTHfapXvPSW3tJZOstgGSxLZWmd45oqUx1nLXxYHpb9sl9uk9uCKlhSJVVuk9vCOn6plMpIGVlRzZIsyeIVr/xF/lLnmJVqSki02QGPJUUUkUZa0PJgAMdxHLvZzRGO8C/+xVzmcpjDZJJJD3owgQmUUEIhhaSRxgAGMIc5Nc5cGIkXeIFf82vyyCOFFO7iLraylRnMCLkAciqp5JFX63woi1jECEY4rti+hS20o11UrkGpRBXJHbf+LdnAXLi4lmuZzvSAZOjFyx3cAdjmiTvLfqq6giuYwhT2spdLuZTLuKzOE0g5+Rk/Yyxj+YEfaEELbud23uGdGletL6KIUkprjeMt3nIs20shhQUsYBSj6h1/pPayl5d5ma/5mv70ZxSjaEazBo9DqUiFnbiNMclAFrBDRBp26r4m5u/8na1s5Uu+JIUUjnKUIQzhV/yqxtd1oAMP8EBMY0smmQwyyCGHN3ijxqQNkElmWG3JfvwkkRRUMWMwIYftx9JqVnM+53OUoxRQwDu8w+M8ThZZdKBDg8ejVCQiuV0bD/UoJVAVmtOcj8t+JjGJVazibd4OqxO0hBLHZpZoO8jBGu+i3bhJJ50XeTGs493CLY6TWpVSylCG1jnOuhrDGA5zuGIAUx55ZJPNL/hFg8eiVKTCStzGmI7AUOCV2IZzbOlNb67hGrrRrdZ9c8llDGPw48eLl770JYvY9SO0p71jtYjB0Ja2TGACa1lLH/qEdbxe9OIxHsODhzTSSCcdP35mMIM00oL238Qm/s2/+ZiPo15HnkceX/BF0PYSSpjDnKieS6lYCKtz0hjzFvAEkA78j1NTiTFmLDAWoHPnzmd99913UQ712HYpl/IhHwY0XaSRxhrW0IUuMTnnFKYwjnEVw9OTScaHj+Us51ROrdMxd7ObBSzAg4ehDA1K2iWUMIpRTGc6LlwIQkc6spjFIRehiNRRjpJGmuOAq7a0ZQ97onIepSIR1aXLjDFXAntFZEVN+4nIRBHJFJHMjIzQw62PRYKEHA0ZjvWs52M+DmpvLqSQZ3imvuGFdBM3MYtZDGYwXenKSEayghV1TtoA7WjHKEZxPdc73mm/wAvMYAYFFHCEI+SQwyY2cQM31OdSArhxM4xhQdU4ySTXeXi/Ug0pnKaSc4HhxpitwBvAxcaYKTGNqok4whFu4zZ8+HDhYhCD6jTicBObHEv+iihiDWuiEWpIF3Mxi1jEt3zLZCZzCqfE9HzP83zQBFTlA3H2sS9q5/k7fw9qwy+llAUsqHHeb7B37G/yJndwB0/wRMA84ko1hFrLAUTkl8AvAYwxg7BNJXpbEoYruILP+bziTnkpSxnAADayMWAtx9r0pKdjh2QqqY5LocWaILzP+yxkIW1ow43cyPEcH5VjO5UMgh1Zmk9+VM4B8AmfkEJKwF8xgvAd37GABVzBFY6vyyGHczmXb/mWHHLw4OExHmM+8wOWb1MqlnSukhj5gi9YycqgxFBIIROZGNGxutKVK7kyYPIog8GLl5/z86jFHI5iihnKUEYwgqd4iv/lfzmFU5jP/KgcfwQjHKtr2tKWjnSMyjnALvvm9CWRTz6rWBXydU/zNBvZWDF/SwEF5JLLSEZGvRNVqVAiStwiskRruMOznvWO61UWUBBU0ZBPPlvYUuPai1OZyi/4BW1piw8fQxnKMpZFrcMuXFOZylKWViS9QgrJI48buMGxsy9Sv+E3tKd9xbqWbtz48TOZyTUuhxapkzjJsY3dh4+TOCnk66Yy1fFz2s9+x7lelIoFveOOkZ70dGwr9eKtWDShlFJ+yS9pQxvO4Aza0IZHeMTxzs2Fi0d4hD3sIZdcZjErrDLCaHuN1xzvVEsp5TM+q/fx29CGr/iKp3iKa7iGe7mXtazlAi6o97GrupZrKwYFlUsmmXTSa5ykK9RgIUFCLr6sVLRp4o6RMzmT/vQP+A89iSQ8eLid2wF4gid4hmfII4/csp+neIrneC5eYdcq1LwogkRtNr400vg5P+ct3uIJnuAETojKcavy4uVTPuUCLiCl7GcQg/iUT2tMwGMZG7DKPdhmq1M4JSZxKuVEJ5mKoTzyeJAHeZVXKaSQwQzmGZ7hZE4GoBWtOMCBoNcdz/HsYEdDhxuWt3iLW7k16K67LW3ZyU7H5qHGrnwVnXCG3pdQwg3cUDFQJ5lk/Pj5kA8rPlel6iKSOm5N3HGwjW3cz/28yZuOz6eQEpX24lgQhDGM4Q3eoJRSXLhIIokFLKA//eMdXoNZzWo+5VOO53iGMCRqMzSqY5cm7kZsP/vpQQ/2sz/koJw+9GElKxs4ssisYQ2LWUxrWjOCEY4dfUqp8Om0ro3YRCZyhCMhk7YPH3/hLw0cVeTOKPtRSjU87ZxsYB/zsWM5mcHQi14sZjEXcmEcIlNKJQpN3A2sJz0dB5j48PEqr8ZlJKRSKrFo4m5gd3JnUOJ24+YMzqA3veMUlVIqkWjibmAncAKLWMTpnE4KKbhxM5zhzGNevENTSiUI7ZyMg370Yw1rOMxh3LjjsnSXUipxaeKOI12YVilVF9pUopRSCUYTt1JKJRhN3EoplWA0cSulVILRxK2UUglGE7dSSiUYTdxKKZVgNHErpVSC0cStlFIJRhO3UkolGE3cSimVYDRxK6VUgtHErZRSCUYTt1JKJRhN3EoplWA0cSulVILRxK2UUglGE7dSSiUYTdxKKZVgNHErpVSC0cStlFIJptbEbYzxGGOWG2NWGWPWGmMeaYjAlFJKOUsJY59C4GIRyTHGuICPjDHzROSzGMemlFLKQa2JW0QEyCn7p6vsIbEMSimlVGhhtXEbY5KNMV8Ce4GFIrIstmEppZQKJazELSIlItIb6AicY4w5vfo+xpixxpgsY0xWdnZ2tONUSilVJqKqEhE5CCwBhjg8N1FEMkUkMyMjI0rhKaWUqi6cqpIMY0yLst+9wCXA+lgHppRSylk4VSXtgdeMMcnYRD9NRGbHNiyllFKhhFNVshro0wCxKKWUCoOOnFRKqQSjiVsppRKMJm6llEowmriVUirBaOJWSqkEo4lbKaUSjCZupZRKMJq4lVIqwWjiVkqpBKOJWymlEowmbqWUSjCauJVSKsFo4lZKqQSjiVsppRKMJm6llEowmriVUirBaOJWSqkEo4lbKaUSjCZupZRKMJq4lVIqwWjiVkqpBKOJWymlEowmbqWUSjCauJVSKsFo4lZKqQSjiVsppRKMJm6llEowmriVUirBaOJWSqkEo4lbKaUSjCZupZRKMJq4lVIqwWjiVkqpBFNr4jbGdDLGfGCMWWeMWWuMGd8QgSmllHKWEsY+xcB9IrLSGJMOrDDGLBSRr2Mcm1JKKQe13nGLyC4RWVn2+xFgHdAh1oEppZRyFlEbtzGmC9AHWBaLYJRSStUu7MRtjEkDpgMTROSww/NjjTFZxpis7OzsaMaolFKqirAStzHGhU3ar4vI2077iMhEEckUkcyMjIxoxqiUUqqKcKpKDPAPYJ2IPB37kJRSStUknDvuc4GbgYuNMV+WPa6IcVxKKaVCqLUcUEQ+AkwDxKKUUioMOnJSKaUSjCZupZRKMJq4lVIqwWjiVkqpBKOJWymlEowmbqWUSjCauJVSKsFo4lZKqQSjiVsppRKMJm6llEowmriVUirBaOJWSqkEo4lbKaUSjCZupaKgqAgeeww6d4aMDBgzBnbvDr3/Dz/Ayy/Dn/8Ma9bUfnwReOstGDIELrkEJk+G4uLoxa8SixGRqB80MzNTsrKyon5cpRqrq6+GBQsgP9/+OyUF2raF9eshPT1w30WLYMQIm4yLiuy+N98ML74IJsQEyqNHw//9H+TmVh6/WTN48EEYN87+rhKbMWaFiGSGs6/ecStVT19/HZi0wd4NHzwIr70WuG9hIVx7rU3AeXk2cefnw+uvw7x5zsdfvRrefLMyaZcf/4cf4Ne/ht697bnUsUMTt1L1tHIlJCcHb8/Lg48+Ctz2n//YO+3qcnPhn/90Pv7ixVBa6vzc0aOwaxc8+2zo+IqL4eGHoU0bcLlgwADQP4gTmyZupeqpa1fn7amp0L174LZQCbim51q1sgk3lIICeNtxCW/rZz+DP/4R9u+3Sfyzz2DQINiwIfRrVOOmiVupeho4ELp0se3OVblctv25qgsvdE7QXi9s3Wq/BIYPhxUrKp+7+urQbd/l2rRx3p6dDVOm2Lv/qgoK4Mknaz7mseTo0Zq/VBsbTdxK1ZMxtjnjsstssna74bTT7Lbjjw/c1+uFqVPt/3o89rUej00cK1fa5D17NlxwAXz4oX1NerptQ8/IgCSH/2L9fpgwwTm2TZvsnX91JSX2fMe6zz+Hvn3t5+H32y/a6l9yjVGtiwUrpWqXkQFz5kBOjk3CrVqF3nfYMNi8Gd54w3Yq/utfsGVL5fMiNnlMmFB55z1ggG3Lnj3bbt+7135JFBbaypKhQ53PddJJ9u66uuRk6NWr9usqKLCxtGxZ+11/ovn2W7j4YvuZgb3WyZNh27bgjuLt22HPHujRA3y+ho81iIhE/XHWWWeJUqp2paUixojYdB34SEkJ/ZqVK0XmzxfZv7/2c4waJeL1Bh7b5xNZuzb0a3Jy7OtSU0XcbpGuXUXee68uV9h43X23iMsV/L57vSLffGP3OXBA5JJLRDwekWbN7Pv29NOxiQfIkjBzrDaVKBVHxkCLFs7PtW4d+jV9+sDll9d8Z1/u5Zdh/Hhb613+2kWLbHNOKNdfb0sQCwvtXxBbttja89Wraz9folizxpZjVud2wzff2N9vuAGWLrV344cP278+fv1r+9dVPGniVqoBHT0KM2bAc8/B8uX2Hu+//zv4z2+fD+67LzrndLngiSfg0KHKtu0BA0Lvv20bvP9+cBNLQYGtTmkqzjnHJunqCgvtl9ru3fDBB/YzqyovL/7vg7ZxK9VANm+G886zNdtFRbad+fzzbSnfgQN25GRKii3Zu+uu6CXuqsJpp/7uO9uhWT1xl5bakaBNxT33wEsv2c+ivLbe67VVPSecYO/I3W6byKvbubNhY61O77iVaiDXX287FY8csUkxN9cOyHnuOXj6afvc8uW2hO/JJ50rSBpCjx7OycrlgnPPbfh4YqVDB1vTPmSIrexp0wbuv992FgN06+b8Redy2WaqciLwxRfw6afBd+exoolbqQawaxd89VVwrXB+Przyiv09LQ1OPdWWpcVTmzZw++2BzTdJSTU337z6qq1Bd7vh9NNDD99vLI4cgccfh5Ej7Zfo1Kn2i/PhhysHO6Wm2knAqr4PLhc0b24recDelXftass3hwyx89PMmtUAFxBuL2YkD60qUceKoiKRRx4Rad1aJDlZpF8/kWXLgvfbts1WJjhVj3TtGp1YDh4U+ec/Rf7yF5GvvqrfsUpKRJ57TuTEE0VathS5+mqRDRuc933hBVttUb1qZe5ckTfeELn0UpHBg0UmTxYpLq5fXNGQmyty6qmBn4ffL/LAA877v/++yJAhIqefLjJ+vMjOnXZ7YaFImzbOVSmbN0ceFxFUlWjiVqoexo4NTlp+v8jXXwfuV1oqcsopwf+Rp6aKPPRQ/eNYskQkLc0+UlNt8hg3zp43lkpKnJMXiLRoYd+Lqu/L8OGxj6k2Tl805Z9FeVIOx7vviqSnBx/H7a7bZxpJ4tamEqWwEubmAAATTUlEQVTqaP9+O2DDaTj5E0/YgR333msH57RubSsV0tNtBxjYppFTToEHHgh8fWkpPP+8bWvu2BHuvNMO/gilqMgOi8/JsY/CQtsEM2VKZGVrmzbZOb8jGVGZmxt6ZsKDBwNnNMzNtdUq5SNCq8vPt00uY8bY96+m+czrY84c59GRqam2nTpc+/c7D5M/ejR2sZfTqhKl6mjzZtumW736oqTEdlZdfLGtey7v6Js71w6Bv+suOxLv3HNtbbTLZZPWvn3Qrh2MHQvTplUml1degZkzYe1a275a3Ycf2nNWl5sLkybBlVfWfB3Fxbatd/ZsG0tJif2SWbDAjpisid9vv4wOHKh5v3J5eTZ5X3BB4PYDB2x53q5dNm6Px7ZBv/++3R5NHTvaip7q71lpKRx3XPjHueAC5/c9LQ1+9KP6xVgbveNWqo5OPNG5iiApySaAdesCqzOKiuxd2vHHw1//CtddZ6sWxo+3d+Snnmr/t/qkUEVFNrH94x/OcTglj3LhrJLz1FM2aefn20EmubmwahXcdlvtr01KsgNSqtehu93Oc6R4PM4Dix591NaPl9+hFxTYvx5uvtl5Gtz6+PnPg2NLSrIdiwMHhn+ck06Cn/40sDPZ57Ods1ddFZ1YQwq3TSWSh7Zxq6Zg716Ru+4S6dhRpFs3kb/9Lbhz7ac/dR5Ofv/9tq3Tqe33f/6n8vX33efc3ur0GDbMOc78fNu2XX1/v19k2rTar7NjR+fzud0ieXm1v760VOSZZ0QyMuzrMjJErr/euTPW7xfZsyf4GJ06Ocfg8Yjs2FF7DJGaNk2keXPbRu3z2Y7HLVsiP05pqchbb9lh8QMH2g7dgoK6xUQ0OyeBScBe4KtwD6qJWyW6w4dFOncOnMvC5xMZOTJwv6Ii2xHVvLmdc6RvX5GPPxaZOdO548rvF3npJfvao0fDT9oul8i994aO99137bFSUyvP8+Mf287D2rRsGfqcBw6E/55lZ9svuPR0+1qv11bapKXZeT5atRJZtMgmu08+EZk+XWT7dvvak092jiElxc7JEosOzcJCkeXLRdavj/6x6yLaifsCoK8mbnUsefZZ56Tq8dgJiIqKRKZMERk61N5dvv9+YHIpKhLp0sUmrvLXGmPLBg8ftvvs2xf6rrz6w+cT2bSp5pi3bxd58kmRBx+0VSbhJruRIwPjLH/07BnZezZyZPCkTSkpIpdfLvLhh/aLats2W11Tnsw9HpF77rFxV//LBSoT/4kn1q3ELpFENXHb49FFE7c6llxzjXMCTU8Xef11W5dctdTN5xP5zW8Cj7Fjh01aKSn2ce65gbXQJSUixx3nfJ6WLW1S93pFTjhB5IMPYnet338v0rZtZeJ0u22y/PTTyI5Tfrfv9GVXLjMz+EvC7xd57TVbKlj1r4aqj6Qkke7d419KGEuRJG7tnFTKwcknO09AJGIXO/jss8BSt7w828m3Y0fltuOPh/nz7Si9Q4fs+pPdulU+n5QUPDLPGPvv996zFRYbNtiZ+QYNivYVVurY0XakPvywnSt8wgRbwdK/f2THCbWCTPn2776zo0erd6bm5sILL8C778InnwQv91Z+jO3bm9bshPURtcRtjBlrjMkyxmRlZ2dH67BKxcXPfha8zmNKCnTqZKf8rJq0qz6/eLEto+vVy1ZQnHQSTJ8eevL9G2+0tdP9+9tKlMsus1UPDzxgY9i6NXC+jKNHbangr3/tXENeV61a2Xk6Zs6086R07hz5MX70o+BFk5OTKxd5OHIkeHm3cmvX2oTeq5ctp3OSnGy/ABXaVKJUKEuX2uHoHo9tPrjoIpFdu2wnoVObcHq6Hf7uVGVS3iFZk9xcW91Q3rZujP39r3+1z2dni5x0UmUFSVqabWqpSzVELGzfLnL88YHxdehQ2QFZVGTb+EN1hL74ot3vz392bu/2++171FShbdxKRUdpqW0DrrrSzNq1zomlZUuR3r2dE1NGRu3ts88953xcr1fk0CGRW28N7vxLTrZzgcTLwYO2w7H82vLybHv1L35h5yapXk748svO7w+I9Olj98nJETnttMovsKQk+/ukSQ17bQ0tqokb+DewCygCtgNjanuNJm7V1L36qk0mzZrZO+2MDJHPPw/ssKxeXVFeTRLKmWc6v7ZZM1sS51ReWJ7Y1q1rmOsud+CA7Ux0uys7UQcMsJNK1TSR1Pr1oTsxTz21cr/cXJHnn7eTO916qy3ba+oiSdzG7h9dmZmZkpWVFfXjKtWY5OTY4eZer10gISXFjppbuzZ43xYt7KjJUHNsHzpkRxQ6jYL0eu283ZdeGrqN1+OxQ+hnzLBD0GPtggtg2bLgkaNer31uzpzg9m6w15ea6nydN91UORf2scgYs0JEMsPZV6tKlKqj8jkpBg2q7HT7/e+DOyL9fnjooZoXRpg1y7mKBez2zEz4yU9C71NQYKtWwhmmXl8bNtjV552G++fnw8cf23lZnKxZE9zpW27v3ujF2NRp4laqDvbutctePfusLdcDexd58KAtr3O7bTVIq1Z2Ho7aliErLAyd2K+6yh7rD3+w5YShqi4KC21JnVPFSzRt2xY6+YL9S+Sdd5yfO3Ik9JdPTk79YztW6OyASkXojTdg9GibaEtLbRnd//4vfPmlvdMsT5xery1vGz++9rUehwxxnhDK77cTGYFtblm1ypYbXntt6FLA3NzYrqLTq5fz0mblUlJCzyqYmelc7+3z2aXdVHj0jlupCOzbZ5N2QYFNnAUF9vG739nmjqp3u/n58PnnsGhR7cft0AEee8wm++Rkm+j9fjuDYNUpUJOSbPPM8OHOd+jt29v5v2OpbVu4447KecWrc7kqv2yqS0qCv/3NJuryNnC/3w54aohmnqZC77iVisDs2c6dbkePOt9V5+TABx/YgTVOvvvOLh6QnW3vuj/6CP79b5v0r70WLrzQ+biPP25HV+bk2HMnJ9tOv4kTA/cvKbFTyD73nO3YvPxyu0hBly51ufpKTz8NPXvaL5vvvrN32eWdjs8/b+fzrmrHDrtAwvvv2xqS3r1ts09+vh2tOXKk7WBV4dHErVQESkps4nGSnBzcDODx2MURnMyaZZsHSkps8n3tNTj7bNsUUlMbcmGhHT3ZooU9X9u2toP0gQdsVUtVt90WuCjDtGmwcCF8/bV9XV0ZY4992212vvD//Mcm4QsvhGbNAvctKrLzXO/YUVlN8sUXtq18y5b4L46ciLSpRKkIDB3q3Ebr9Tp3uiUn27vJ6goLbflbfn5ldUZODixZYpPft986n1/ENpU88ojd5+BB23yzbp1d6qyq77+3d+9V28JLS+15nn8+rMsNi8sFl1xi75yrJ22wpYEHDgSWAJaW2rimTYteHMcSTdxKRaBdO/jLX2yidrlsm63PB+PG2bvOjh1t1Udamr2jnTPHuc152TLn44vYdQ/79rVNENV99BEsX24TfrmCAluiV70Eb/Vq5+aHwkJbstdQNm0KXt4NbH/Axo0NF0dTok0lSkXojjvsHeabb9qEdPXVNtGC/fN/9erKCZOc2sPBtgfXNPYtN9e2Y7/0UuD25cud66dzcmzCHzascluopdVSUuwyaQ2lVy97vUVFgdvT0qBPn4aLoynRxK1UHZx8sh1UU50xNlHV5uyzbeI6csT5+eJiewdfXYcOzknQ5wue0a9HD1t+t3x5YPme221LFCNx8KDt4Fy3Ds4/364FGW7b9ODB9v2qugany2X/ehkxInDfQ4dsZ+1HH9n4x42z16yqCXdsfCQPnatEqdplZdl5SEJNuuQ0eVR+vp0XxZjAfZs3d15m7OBBkf/6LzufiMtlF0y45RaR994Lb1kzEZE337TzoVSf8fCLL8K/1kOH7PqdrVqJtGghcvvtdgWgqnbsEGnfvnJyqdRUO8PgsTBPiUiUJ5mqy0MTt1LhycsTOftsOwlV9cS4eLHza9avF+nVy0436/HYyZlqS6JLltgkWD77YFqayIUX2nUXa7J7d3Bs5Y/OnaO7Is0ttzif6/TTo3eOxiySxK2dk0rFkddrF1+4+mrbBOL321GHL7wAF13k/Jru3e0ozU2bbKfkunW2LjoUEdu0kZNT2amZk2MHB734Ys3xvfWW84RQALt3w/r1tV9juObMcR49umGDbapRlTRxKxVnaWm2LG7XLpuQ9+6FW26p/XVt28KUKbZtOyMDbr0Vdu4M3m/dOvjhh+DteXnwz3/WfI78/NCdqCKhk3pdhBqJaUzo+U2OVZq4lWokWra0nXihlveq7ic/sbMRfv+9reV+/XU466zgqV+TkkIn35pmLARbtx4qnpYt7ejJaBk3Ljh5u1x2RGmopd+OVZq4lYqxxYttmV5mJvz2t853v5HasAHmzQus5y4uhsOHYdKkwH27d7frWVbn89U+P0iPHnD33cEJ3uWyMwBWH45f08jS2jzwgB2S7/XaOcXT0uzQ+X/8o27Ha8o0cSsVQ88/b5P27Nl2Dusnn7Tt0fVN3l984XwnnJdnS+mqMsYuWNyihU2GKSm2LX3QILj99trP9fTTtjRx6FBbdz1hgm2SGTCgcp9Vq+yCx263Tby33x759LIul10IYsUKW7++cKG9zjZtIjvOsUDruJWKkdxcO+Vr1SHnhYW2DfvZZ+3dd1117eo89N7tDh76Djbhfv89vP227VQ87zybeGubbrbceefZLx8nO3bY2u7ymvTCQruSzbff2kmlItWjh/M1qEp6x61UjHz5pfNdcWFh6CQYrnPOgVNOCZ6Myu22IzudpKXZTs/777eTPoWbtGvz3HPB83MXFsJnnzkv46bqTxO3UjGSkeFc3gahZwwMlzF2nu8hQ2zydrtte/CiRXa+lIa0enXoofU6F0lsaOJWKka6dbPJtPpdt88H995b/+O3bg0zZ9qZ93btsne3/frV/7iRysx0nsyqqCi6VSeqkiZupWJo1izbGenz2SlPfT546qnQg2vqwu+3a1vGy5132sRdtenF47FzlHTrFr+4mjLtnFQqhtq1syMUN260tda9ejW9hQOOO85OUzthgi199PlsVcnvfhfvyJouTdxKNYBu3SK7+xSxVRleLxx/fOziipZu3YLnA1exo00lSjUy//mPHcZ+5pl2Tu1+/ZwXVVDHLk3cSjUi27bZgS7bt9v678JCOyBl0CDnum11bNLErVQj8vLLwYsklJTA/v12tXilQBO3Uo3K1q3ONdEidoSiUqCJW6lG5aKLnKtOSkriU6OtGidN3Eo1IjfcYKtIUlMrt/l8dqGF7t3jF5dqXDRxK9WIeL12cd///m846SQ44wz4059g8uR4R6YaEyN1nTy3BpmZmZKVlRX14yqlVFNljFkhIpnh7Kt33EoplWDCStzGmCHGmA3GmE3GmAdjHZRSSqnQak3cxphk4HngR8BpwA3GmNNiHZhSSiln4dxxnwNsEpFvReQo8AZwVWzDUkopFUo4ibsD8H2Vf28v26aUUioOwkncTgscBZWiGGPGGmOyjDFZ2dnZ9Y9MKaWUo3Cmdd0OdKry747Azuo7ichEYCKAMSbbGNMU5jNrA+yLdxANRK+1adJrTRwnhLtjrXXcxpgUYCMwGNgBfA6MFJEmvwyoMSYr3LrKRKfX2jTptTZNtd5xi0ixMeYuYAGQDEw6FpK2Uko1VmGtgCMicwFd30IppRoBHTlZs4nxDqAB6bU2TXqtTVBM5ipRSikVO3rHrZRSCUYTN7XPxWKMubWsxPHLssdt8Yizvowxk4wxe40xX4V43hhjnil7H1YbY/o2dIzREsa1DjLGHKrymf6moWOMFmNMJ2PMB8aYdcaYtcaY8Q77NInPNsxrbTKfbUgickw/sJUym4ETATewCjit2j63As/FO9YoXOsFQF/gqxDPXwHMww666g8si3fMMbzWQcDseMcZpWttD/Qt+z0dW75b/f/DTeKzDfNam8xnG+qhd9zH0FwsIrIU+KGGXa4CJov1GdDCGNO+YaKLrjCutckQkV0isrLs9yPAOoKnpWgSn22Y19rkaeIOfy6Wa8r+xHzLGNPJ4fmm4Fibl2aAMWaVMWaeMaZnvIOJBmNMF6APsKzaU03us63hWqEJfrZVaeIOby6WWUAXETkTWAS8FvOo4iOseWmaiJXACSLSC3gWeCfO8dSbMSYNmA5MEJHD1Z92eEnCfra1XGuT+2yr08QdxlwsIrJfRArL/vkycFYDxdbQwpqXpikQkcMiklP2+1zAZYxpE+ew6swY48ImstdF5G2HXZrMZ1vbtTa1z9aJJm4798opxpiuxhg38BNgZtUdqrUFDse2qzVFM4FbyioQ+gOHRGRXvIOKBWNMO2OMKfv9HOx/C/vjG1XdlF3HP4B1IvJ0iN2axGcbzrU2pc82lLCGvDdlEmIuFmPM74AsEZkJ3GOMGQ4UYzu8bo1bwPVgjPk3tse9jTFmO/BbwAUgIi9ipzW4AtgE5AGj4xNp/YVxrdcCPzPGFAP5wE+krCQhAZ0L3AysMcZ8WbbtV0BnaHKfbTjX2pQ+W0c6clIppRKMNpUopVSC0cStlFIJRhO3UkolGE3cSimVYDRxK6VUgtHErZRSCUYTt1JKJRhN3EoplWD+HyN/EW4yHiiGAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# We will import KMeans model from clustering model family of Sklearn\n",
    "from sklearn.cluster import KMeans\n",
    "\n",
    "k_means = KMeans(n_clusters=2)\n",
    "k_means.fit(X)\n",
    "predictions = k_means.predict(X)\n",
    "\n",
    "# Let's plot the predictions\n",
    "plt.scatter(X[:, 0], X[:, 1], c=predictions, cmap='brg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'algorithm': 'auto',\n",
       " 'copy_x': True,\n",
       " 'init': 'k-means++',\n",
       " 'max_iter': 300,\n",
       " 'n_clusters': 2,\n",
       " 'n_init': 10,\n",
       " 'n_jobs': 1,\n",
       " 'precompute_distances': 'auto',\n",
       " 'random_state': None,\n",
       " 'tol': 0.0001,\n",
       " 'verbose': 0}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "k_means.get_params()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Featuretools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>database_id</th>\n",
       "      <th>creation_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2234</td>\n",
       "      <td>2018-02-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1765</td>\n",
       "      <td>2017-03-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8796</td>\n",
       "      <td>2017-05-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2237</td>\n",
       "      <td>2013-05-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3398</td>\n",
       "      <td>2012-05-09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   database_id creation_date\n",
       "0         2234    2018-02-01\n",
       "1         1765    2017-03-02\n",
       "2         8796    2017-05-03\n",
       "3         2237    2013-05-12\n",
       "4         3398    2012-05-09"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# First dataset contains the basic information for databases.\n",
    "databases_df = pd.DataFrame({\"database_id\": [2234, 1765, 8796, 2237, 3398],\n",
    "\"creation_date\": [\"2018-02-01\", \"2017-03-02\", \"2017-05-03\", \"2013-05-12\", \"2012-05-09\"]})\n",
    "\n",
    "databases_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>transaction_id</th>\n",
       "      <th>database_id</th>\n",
       "      <th>transaction_size</th>\n",
       "      <th>transaction_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>26482746</td>\n",
       "      <td>2234</td>\n",
       "      <td>10</td>\n",
       "      <td>2018-02-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19384752</td>\n",
       "      <td>1765</td>\n",
       "      <td>20</td>\n",
       "      <td>2018-03-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>48571125</td>\n",
       "      <td>2234</td>\n",
       "      <td>30</td>\n",
       "      <td>2018-03-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>78546789</td>\n",
       "      <td>2237</td>\n",
       "      <td>50</td>\n",
       "      <td>2018-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>19998765</td>\n",
       "      <td>1765</td>\n",
       "      <td>100</td>\n",
       "      <td>2018-04-02</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   transaction_id  database_id  transaction_size transaction_date\n",
       "0        26482746         2234                10       2018-02-02\n",
       "1        19384752         1765                20       2018-03-02\n",
       "2        48571125         2234                30       2018-03-02\n",
       "3        78546789         2237                50       2018-04-02\n",
       "4        19998765         1765               100       2018-04-02"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Second dataset contains the information of transaction for each database id\n",
    "db_transactions_df = pd.DataFrame({\"transaction_id\": [26482746, 19384752, 48571125, 78546789, 19998765, 26482646, 12484752, 42471125, 75346789, 16498765, 65487547, 23453847, 56756771, 45645667, 23423498, 12335268, 76435357, 34534711, 45656746, 12312987],\n",
    "                \"database_id\": [2234, 1765, 2234, 2237, 1765, 8796, 2237, 8796, 3398, 2237, 3398, 2237, 2234, 8796, 1765, 2234, 2237, 1765, 8796, 2237],\n",
    "                \"transaction_size\": [10, 20, 30, 50, 100, 40, 60, 60, 10, 20, 60, 50, 40, 40, 30, 90, 130, 40, 50, 30],\n",
    "                \"transaction_date\": [\"2018-02-02\", \"2018-03-02\", \"2018-03-02\", \"2018-04-02\", \"2018-04-02\", \"2018-05-02\", \"2018-06-02\", \"2018-06-02\", \"2018-07-02\", \"2018-07-02\", \"2018-01-03\", \"2018-02-03\", \"2018-03-03\", \"2018-04-03\", \"2018-04-03\", \"2018-07-03\", \"2018-07-03\", \"2018-07-03\", \"2018-08-03\", \"2018-08-03\"]})\n",
    "\n",
    "db_transactions_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'databases': (   database_id creation_date\n",
      "0         2234    2018-02-01\n",
      "1         1765    2017-03-02\n",
      "2         8796    2017-05-03\n",
      "3         2237    2013-05-12\n",
      "4         3398    2012-05-09, 'database_id'), 'transactions': (    transaction_id  database_id  transaction_size transaction_date\n",
      "0         26482746         2234                10       2018-02-02\n",
      "1         19384752         1765                20       2018-03-02\n",
      "2         48571125         2234                30       2018-03-02\n",
      "3         78546789         2237                50       2018-04-02\n",
      "4         19998765         1765               100       2018-04-02\n",
      "5         26482646         8796                40       2018-05-02\n",
      "6         12484752         2237                60       2018-06-02\n",
      "7         42471125         8796                60       2018-06-02\n",
      "8         75346789         3398                10       2018-07-02\n",
      "9         16498765         2237                20       2018-07-02\n",
      "10        65487547         3398                60       2018-01-03\n",
      "11        23453847         2237                50       2018-02-03\n",
      "12        56756771         2234                40       2018-03-03\n",
      "13        45645667         8796                40       2018-04-03\n",
      "14        23423498         1765                30       2018-04-03\n",
      "15        12335268         2234                90       2018-07-03\n",
      "16        76435357         2237               130       2018-07-03\n",
      "17        34534711         1765                40       2018-07-03\n",
      "18        45656746         8796                50       2018-08-03\n",
      "19        12312987         2237                30       2018-08-03, 'transaction_id')}\n"
     ]
    }
   ],
   "source": [
    "# Entities for each of datasets should be defined\n",
    "entities = {\n",
    "\"databases\" : (databases_df, \"database_id\"),\n",
    "\"transactions\" : (db_transactions_df, \"transaction_id\")\n",
    "}\n",
    "\n",
    "# Relationships between tables should also be defined as below\n",
    "relationships = [(\"databases\", \"database_id\", \"transactions\", \"database_id\")]\n",
    "\n",
    "print(entities)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'featuretools'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-24-7f632589f877>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m# All the pieces that are necessary to engineer features are in place, you can create your feature matrix as below\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mfeaturetools\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mft\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0mfeature_matrix_db_transactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeature_defs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mentities\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mentities\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrelationships\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrelationships\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_entity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"databases\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'featuretools'"
     ]
    }
   ],
   "source": [
    "# There are 2 entities called ‘databases’ and ‘transactions’\n",
    "# All the pieces that are necessary to engineer features are in place, you can create your feature matrix as below\n",
    "\n",
    "import featuretools as ft\n",
    "\n",
    "feature_matrix_db_transactions, feature_defs = ft.dfs(entities=entities, relationships=relationships, target_entity=\"databases\")\n",
    "\n",
    "feature_defs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Auto-sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'autosklearn'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-25-6a29bd1e7dd2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Necessary imports\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mautosklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassification\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_selection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'autosklearn'"
     ]
    }
   ],
   "source": [
    "# Necessary imports\n",
    "import autosklearn.classification\n",
    "import sklearn.model_selection\n",
    "import sklearn.datasets\n",
    "import sklearn.metrics\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Digits dataset is one of the most popular datasets in machine learning community.\n",
    "# Every example in this datasets represents a 8x8 image of a digit.\n",
    "X, y = sklearn.datasets.load_digits(return_X_y=True)\n",
    "\n",
    "# Let's see the first image. Image is reshaped to 8x8, otherwise it's a vector of size 64.\n",
    "X[0].reshape(8,8)\n",
    "\n",
    "# Let's also plot couple of them\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "number_of_images = 10\n",
    "images_and_labels = list(zip(X, y))\n",
    "\n",
    "for i, (image, label) in enumerate(images_and_labels[:number_of_images]):\n",
    "    plt.subplot(2, number_of_images, i + 1)\n",
    "    plt.axis('off')\n",
    "    plt.imshow(image.reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')\n",
    "    plt.title('%i' % label)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'autosklearn' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-26-157574019f68>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m# Similarly to creating an estimator in Scikit-learn, we create AutoSklearnClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mautoml\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mautosklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassification\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mAutoSklearnClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;31m# All you need to do is to invoke fit method to start experiment with different feature engineering methods and machine learning models\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'autosklearn' is not defined"
     ]
    }
   ],
   "source": [
    "# We split our dataset to train and test data\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)\n",
    "\n",
    "# Similarly to creating an estimator in Scikit-learn, we create AutoSklearnClassifier\n",
    "automl = autosklearn.classification.AutoSklearnClassifier()\n",
    "\n",
    "# All you need to do is to invoke fit method to start experiment with different feature engineering methods and machine learning models\n",
    "automl.fit(X_train, y_train)\n",
    "\n",
    "# Generating predictions is same as Scikit-learn, you need to invoke predict method.\n",
    "y_hat = automl.predict(X_test)\n",
    "\n",
    "print(\"Accuracy score\", sklearn.metrics.accuracy_score(y_test, y_hat))\n",
    "# Accuracy score 0.98"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MLBox"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'mlbox'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-27-4ccec4340e53>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Necessary Imports\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mmlbox\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpreprocessing\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mmlbox\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimisation\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mmlbox\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprediction\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwget\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mlbox'"
     ]
    }
   ],
   "source": [
    "# Necessary Imports\n",
    "from mlbox.preprocessing import *\n",
    "from mlbox.optimisation import *\n",
    "from mlbox.prediction import *\n",
    "import wget\n",
    "\n",
    "file_link = 'https://apsportal.ibm.com/exchange-api/v1/entries/8044492073eb964f46597b4be06ff5ea/data?accessKey=9561295fa407698694b1e254d0099600'\n",
    "file_name = wget.download(file_link)\n",
    "\n",
    "print(file_name)\n",
    "# GoSales_Tx_NaiveBayes.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "File b'GoSales_Tx_NaiveBayes.csv' does not exist",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-28-b1dd9fe7218a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'GoSales_Tx_NaiveBayes.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m    676\u001b[0m                     skip_blank_lines=skip_blank_lines)\n\u001b[1;32m    677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 678\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    679\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    680\u001b[0m     \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    438\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    439\u001b[0m     \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m     \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    441\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    442\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m    785\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    786\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 787\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    788\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    789\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m   1012\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1013\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1014\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1015\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1016\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m   1706\u001b[0m         \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'usecols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1708\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1709\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1710\u001b[0m         \u001b[0mpassed_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnames\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: File b'GoSales_Tx_NaiveBayes.csv' does not exist"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('GoSales_Tx_NaiveBayes.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-29-a06841267e05>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PRODUCT_LINE'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# First 300 records saved as test datased\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mtest_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m300\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'test_data.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
     ]
    }
   ],
   "source": [
    "test_df = df.drop(['PRODUCT_LINE'], axis = 1)\n",
    "\n",
    "# First 300 records saved as test datased\n",
    "test_df[:300].to_csv('test_data.csv')\n",
    "\n",
    "paths = [\"GoSales_Tx_NaiveBayes.csv\", \"test_data.csv\"]\n",
    "target_name = \"PRODUCT_LINE\"\n",
    "\n",
    "rd = Reader(sep = ',')\n",
    "df = rd.train_test_split(paths, target_name)\n",
    "\n",
    "dft = Drift_thresholder()\n",
    "df = dft.fit_transform(df)\n",
    "\n",
    "opt = Optimiser(scoring = 'accuracy', n_folds = 3)\n",
    "opt.evaluate(None, df)\n",
    "\n",
    "space = {\n",
    "        'ne__numerical_strategy':{\"search\":\"choice\", \"space\":[0]},\n",
    "        'ce__strategy':{\"search\":\"choice\",\n",
    "               \"space\":[\"label_encoding\",\"random_projection\", \"entity_embedding\"]},\n",
    "        'fs__threshold':{\"search\":\"uniform\", \"space\":[0.01,0.3]},\n",
    "        'est__max_depth':{\"search\":\"choice\", \"space\":[3,4,5,6,7]}\n",
    "        }\n",
    "\n",
    "best = opt.optimise(space, df,15)\n",
    "\n",
    "predictor = Predictor()\n",
    "predictor.fit_predict(best, df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TPOT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'tpot'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-30-8936f022d7e5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtpot\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTPOTClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mload_digits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_selection\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m# Digits dataset that you have used in Auto-sklearn example\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tpot'"
     ]
    }
   ],
   "source": [
    "from tpot import TPOTClassifier\n",
    "from sklearn.datasets import load_digits\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Digits dataset that you have used in Auto-sklearn example\n",
    "digits = load_digits()\n",
    "X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n",
    "                                                    train_size=0.75, test_size=0.25)\n",
    "\n",
    "# You will create your TPOT classifier with commonly used arguments\n",
    "tpot = TPOTClassifier(generations=10, population_size=30, verbosity=2)\n",
    "\n",
    "# When you invoke fit method, TPOT will create generations of populations, seeking best set of parameters. Arguments you have used to create TPOTClassifier such as generaions and population_size will affect the search space and resulting pipeline.\n",
    "tpot.fit(X_train, y_train)\n",
    "\n",
    "print(tpot.score(X_test, y_test))\n",
    "# 0.9834"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'tpot' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-31-cdcafa9313f0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtpot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexport\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'my_pipeline.py'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'tpot' is not defined"
     ]
    }
   ],
   "source": [
    "tpot.export('my_pipeline.py')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cat: my_pipeline.py: No such file or directory\r\n"
     ]
    }
   ],
   "source": [
    "!cat my_pipeline.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'make_pipeline' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-33-683864e61a6d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mneighbors\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mKNeighborsClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m exported_pipeline = make_pipeline(\n\u001b[0m\u001b[1;32m      7\u001b[0m     \u001b[0mStackingEstimator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDecisionTreeClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcriterion\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"entropy\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_depth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_samples_leaf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_samples_split\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0mKNeighborsClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_neighbors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"distance\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'make_pipeline' is not defined"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "\n",
    "exported_pipeline = make_pipeline(\n",
    "    StackingEstimator(estimator=DecisionTreeClassifier(criterion=\"entropy\", max_depth=6, min_samples_leaf=2, min_samples_split=2)),\n",
    "    KNeighborsClassifier(n_neighbors=2, weights=\"distance\")\n",
    ")\n",
    "\n",
    "exported_pipeline.fit(X_train, y_train)\n",
    "results = exported_pipeline.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
